rm(list = ls()) # remove all data from global environment.
setwd("~/Documents/Workspace/COVID") #Set working directory
# Load libraries for importing text and plotting data
library(tidyverse)
library(ggplot2)
library(plotly)
library(formattable)
library(reshape2)
# library(sparkline)
# library(kableExtra)
# library(heatmaply)
# library(reshape)
# library(matrixStats)
library(covdata)
library(ggrepel)
library(paletteer)
library(prismatic)In this document I’m fiddling around with the public Corona-19 data repositories.
The covnat library is a data package for R. It provides COVID-19 case data from three sources:
National level data from the European Centers for Disease Control.
State-level data for the United States from the COVID Tracking Project.
State-level and county-level data for the United States from the New York Times.
The data are provided as-is. More information about collection methods, scope, limits, and possible sources of error in the data can be found in the documentation provided by their respective sources and on Kieran Healy’s git.
# Visualisations
The Netherlands. For such a small country, Covid-19 doesn’t seem to underperform. Are The Netherlands among the top Corona-countries because high amount of testing? Still, Dutch laboratories are remarkably reluctant on testing; even health-care staff are hardy tested.
## Convenince "Not in" operator
"%nin%" <- function(x, y) {
return( !(x %in% y) )
}
## Countries to highlight
focus_cn <- c("CHN", "DEU", "GBR", "USA", "ITA", "FRA", "ESP", "POL", "NLD", "KOR")
## Colors
cgroup_cols <- c(clr_darken(paletteer_d("ggsci::category20_d3"), 0.2)[1:length(focus_cn)], "gray70")
covnat %>%
filter(cu_cases > 99) %>%
mutate(days_elapsed = date - min(date),
end_label = ifelse(date == max(date), cname, NA),
end_label = recode(end_label, `United States` = "USA",
`Iran, Islamic Republic of` = "Iran",
`Korea, Republic of` = "South Korea",
`United Kingdom` = "UK"),
cname = recode(cname, `United States` = "USA",
`Iran, Islamic Republic of` = "Iran",
`Korea, Republic of` = "South Korea",
`United Kingdom` = "UK"),
end_label = case_when(iso3 %in% focus_cn ~ end_label,
TRUE ~ NA_character_),
cgroup = case_when(iso3 %in% focus_cn ~ iso3,
TRUE ~ "ZZOTHER")) %>%
ggplot(mapping = aes(x = days_elapsed, y = cu_cases,
color = cgroup, label = end_label,
group = cname)) +
geom_line(size = 0.5) +
geom_text_repel(nudge_x = 0.75,
segment.color = NA) +
guides(color = FALSE) +
scale_color_manual(values = cgroup_cols) +
scale_y_continuous(labels = scales::comma_format(accuracy = 1),
breaks = 2^seq(4, 19, 1),
trans = "log2") +
labs(x = "Days Since 100th Confirmed Case",
y = "Cumulative Number of Reported Cases (log2 scale)",
title = "Cumulative Reported Cases of COVID-19, Selected Countries",
subtitle = paste("ECDC data as of", format(max(covnat$date), "%A, %B %e, %Y")),
caption = "Data: https://www.ecdc.europa.eu/") +
theme_minimal()In lines.
covnat %>%
filter(cu_deaths > 99) %>%
mutate(days_elapsed = date - min(date),
end_label = ifelse(date == max(date), cname, NA),
end_label = recode(end_label, `United States` = "USA",
`Iran, Islamic Republic of` = "Iran",
`Korea, Republic of` = "South Korea",
`United Kingdom` = "UK"),
cname = recode(cname, `United States` = "USA",
`Iran, Islamic Republic of` = "Iran",
`Korea, Republic of` = "South Korea",
`United Kingdom` = "UK"),
end_label = case_when(iso3 %in% focus_cn ~ end_label,
TRUE ~ NA_character_),
cgroup = case_when(iso3 %in% focus_cn ~ iso3,
TRUE ~ "ZZOTHER")) %>%
ggplot(mapping = aes(x = days_elapsed, y = cu_deaths,
color = cgroup, label = end_label,
group = cname)) +
geom_line(size = 0.5) +
geom_text_repel(nudge_x = 0.75,
segment.color = NA) +
guides(color = FALSE) +
scale_color_manual(values = cgroup_cols) +
scale_y_continuous(labels = scales::comma_format(accuracy = 1),
breaks = 2^seq(4, 19, 1),
trans = "log2") +
labs(x = "Days Since 100th Confirmed Case",
y = "Cumulative Number of Reported Cases (log2 scale)",
title = "Cumulative Reported Deaths of COVID-19, Selected Countries",
subtitle = paste("ECDC data as of", format(max(covnat$date), "%A, %B %e, %Y")),
caption = "Data: https://www.ecdc.europa.eu/") +
theme_minimal()And in bars.
focus_cn <- c("USA", "ITA", "ESP", "NLD")
filter_countries <- function(x) {
w <- paste(focus_cn, collapse= "|")
#New column for name-groups
x$group <- NA
x <- mutate(x, group =
ifelse(grepl(w, iso3), 9,
NA))
x <- filter(x, group == 9)
}
bar <- filter_countries(covnat)
bar <- bar %>%
filter(date > '2020-03-01')
p <- ggplot(bar, aes(x=date,y=cu_deaths )) + geom_bar(stat="identity")
p <- p + facet_grid(rows = vars(iso3))
fig <- ggplotly(p)
fig##
## To cite the package `covdata` in publications use:
##
## Kieran Healy. 2020. covdata: COVID-19 Case and Mortality Time
## Series. R package version 0.1.0,
## <http://kjhealy.github.io/covdata>.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {covdata: COVID-19 Case and Mortality Time Series},
## author = {Kieran Healy},
## year = {2020},
## note = {R package version 0.1.0},
## url = {http://kjhealy.github.io/covdata},
## }